Updating examples to use the newer mlr3 library, since mlr is deprecated.
use_mlr3 <- FALSE
# install.packages("mlr3", dependencies = TRUE)
# install.packages("e1071", dependencies = TRUE)
# install.packages("mlr3learners", dependencies = TRUE)
if (use_mlr3) {
library(mlr3)
library(mlr3learners)
} else {
library(mlr)
}
library(tidyverse)## Class V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
## 0 12 48 11 11 15 11 14 15 22 7 21 31
## V13 V14 V15 V16
## 25 17 28 104
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
## 39295 40948 54745 39325 47018 63069 51708 53983 44716 49698 33925 37832 46163
## V14 V15 V16
## 54905 37360 59743
votesUntidy <- gather(votesTib, "Variable", "Value", -Class)
ggplot(votesUntidy, aes(Class, fill = Value)) +
facet_wrap(~ Variable, scales = "free_y") +
geom_bar(position = "fill") +
theme_bw() ## Section 6.2.3
if (use_mlr3) {
# votesTask <- makeClassifTask(data = votesTib, target = "Class")
votesTask <- TaskClassif$new(id = "votes", backend = votesTib, target = "Class")
# bayes <- makeLearner("classif.naiveBayes")
bayesLearner <- lrn("classif.naive_bayes")
bayesModel <- bayesLearner$train(votesTask)
} else {
votesTask <- makeClassifTask(data = votesTib, target = "Class")
bayes <- makeLearner("classif.naiveBayes")
bayesModel <- train(bayes, votesTask)
}## Warning in makeTask(type = type, data = data, weights = weights, blocking =
## blocking, : Provided data is not a pure data.frame but from class tbl_df, hence
## it will be converted.
if (use_mlr3) {
# todo: find mlr3 equivalent for resampling
} else {
kFold <- makeResampleDesc(method = "RepCV", folds = 10, reps = 50, stratify = TRUE)
bayesCV <- resample(learner = bayes, task = votesTask, resampling = kFold,
measures = list(mmce, acc, fpr, fnr))
bayesCV$aggr
}## mmce.test.mean acc.test.mean fpr.test.mean fnr.test.mean
## 0.09856549 0.90143451 0.08267647 0.10856980
politician <- tibble(V1 = "n", V2 = "n", V3 = "y", V4 = "n", V5 = "n", V6 = "y",
V7 = "y", V8 = "y", V9 = "y", V10 = "y", V11 = "n", V12 = "y",
V13 = "n", V14 = "n", V15 = "y", V16 = "n")
politicianPred <- predict(bayesModel, newdata = politician)
getPredictionResponse(politicianPred)## [1] democrat
## Levels: democrat republican
\[ p(k|x) = \frac {p(x|k) \times p(k)} {p(x)} \] \[ posterior = \frac {likelihood \times prior} {evidence} \]
## Y
## democrat republican
## 267 168
## $V1
## V1
## Y n y
## democrat 0.3953488 0.6046512
## republican 0.8121212 0.1878788
##
## $V2
## V2
## Y n y
## democrat 0.4979079 0.5020921
## republican 0.4932432 0.5067568
##
## $V3
## V3
## Y n y
## democrat 0.1115385 0.8884615
## republican 0.8658537 0.1341463
##
## $V4
## V4
## Y n y
## democrat 0.94594595 0.05405405
## republican 0.01212121 0.98787879
##
## $V5
## V5
## Y n y
## democrat 0.78431373 0.21568627
## republican 0.04848485 0.95151515
##
## $V6
## V6
## Y n y
## democrat 0.5232558 0.4767442
## republican 0.1024096 0.8975904
##
## $V7
## V7
## Y n y
## democrat 0.2277992 0.7722008
## republican 0.7592593 0.2407407
##
## $V8
## V8
## Y n y
## democrat 0.1711027 0.8288973
## republican 0.8471338 0.1528662
##
## $V9
## V9
## Y n y
## democrat 0.2419355 0.7580645
## republican 0.8848485 0.1151515
##
## $V10
## V10
## Y n y
## democrat 0.5285171 0.4714829
## republican 0.4424242 0.5575758
##
## $V11
## V11
## Y n y
## democrat 0.4941176 0.5058824
## republican 0.8679245 0.1320755
##
## $V12
## V12
## Y n y
## democrat 0.8554217 0.1445783
## republican 0.1290323 0.8709677
##
## $V13
## V13
## Y n y
## democrat 0.7103175 0.2896825
## republican 0.1392405 0.8607595
##
## $V14
## V14
## Y n y
## democrat 0.64980545 0.35019455
## republican 0.01863354 0.98136646
##
## $V15
## V15
## Y n y
## democrat 0.36254980 0.63745020
## republican 0.91025641 0.08974359
##
## $V16
## V16
## Y n y
## democrat 0.06486486 0.93513514
## republican 0.34246575 0.65753425
## Warning in makeTask(type = type, data = data, weights = weights, blocking =
## blocking, : Provided data is not a pure data.frame but from class tbl_df, hence
## it will be converted.
## Type len Def
## type discrete - C-classifica...
## cost numeric - 1
## nu numeric - 0.5
## class.weights numericvector <NA> -
## kernel discrete - radial
## degree integer - 3
## coef0 numeric - 0
## gamma numeric - -
## cachesize numeric - 40
## tolerance numeric - 0.001
## shrinking logical - TRUE
## cross integer - 0
## fitted logical - TRUE
## scale logicalvector <NA> TRUE
## Constr Req Tunable Trafo
## type C-classification,nu-classification - TRUE -
## cost 0 to Inf Y TRUE -
## nu -Inf to Inf Y TRUE -
## class.weights 0 to Inf - TRUE -
## kernel linear,polynomial,radial,sigmoid - TRUE -
## degree 1 to Inf Y TRUE -
## coef0 -Inf to Inf Y TRUE -
## gamma 0 to Inf Y TRUE -
## cachesize -Inf to Inf - TRUE -
## tolerance 0 to Inf - TRUE -
## shrinking - - TRUE -
## cross 0 to Inf - FALSE -
## fitted - - FALSE -
## scale - - TRUE -
## Starting parallelization in mode=socket with cpus=2.
tunedSvmPars <- tuneParams("classif.svm", task = spamTask,
resampling = cvForTuning,
par.set = svmParamSpace, control = randSearch)## [Tune] Started tuning learner classif.svm for parameter set:
## Type len Def Constr Req Tunable Trafo
## kernel discrete - - polynomial,radial,sigmoid - TRUE -
## degree integer - - 1 to 3 - TRUE -
## cost numeric - - 0.1 to 10 - TRUE -
## gamma numeric - - 0.1 to 10 - TRUE -
## With control class: TuneControlRandom
## Imputation value: 1
## Exporting objects to slaves for mode socket: .mlr.slave.options
## Mapping in parallel: mode = socket; level = mlr.tuneParams; cpus = 2; elements = 20.
## [Tune] Result: kernel=polynomial; degree=1; cost=6.76; gamma=6.92 : mmce.test.mean=0.0756193
## Stopped parallelization. All cleaned up.
## Tune result:
## Op. pars: kernel=polynomial; degree=1; cost=6.76; gamma=6.92
## mmce.test.mean=0.0756193
## $kernel
## [1] "polynomial"
##
## $degree
## [1] 1
##
## $cost
## [1] 6.756972
##
## $gamma
## [1] 6.918203
outer <- makeResampleDesc("CV", iters = 3)
svmWrapper <- makeTuneWrapper("classif.svm", resampling = cvForTuning, par.set = svmParamSpace,
control = randSearch)
parallelStartSocket(cpus = detectCores())## Starting parallelization in mode=socket with cpus=2.
## Exporting objects to slaves for mode socket: .mlr.slave.options
## Resampling: cross-validation
## Measures: mmce
## Mapping in parallel: mode = socket; level = mlr.resample; cpus = 2; elements = 3.
##
## Aggregated Result: mmce.test.mean=0.0778137
##
## Stopped parallelization. All cleaned up.
## Resample Result
## Task: spamTib
## Learner: classif.svm.tuned
## Aggr perf: mmce.test.mean=0.0778137
## Runtime: 163.162
## mmce.test.mean
## 92.21863
# https://mlr-org.com/docs/2015-07-28-visualisation-of-predictions/
plotLearnerPrediction(learner = makeLearner("classif.svm", kernel = "polynomial"), task = spamTask) ### Exercise 3